-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LTO] Drop the weak function if there is a non-weak global symbol #76287
Conversation
@llvm/pr-subscribers-llvm-binary-utilities @llvm/pr-subscribers-lto Author: DianQK (DianQK) ChangesAlternative to #76040. This PR only implements the handling of the weak function. We should also be able to use Full diff: https://github.com/llvm/llvm-project/pull/76287.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Object/ModuleSymbolTable.h b/llvm/include/llvm/Object/ModuleSymbolTable.h
index 1134b98c2247e2..3f2834c2ff56d1 100644
--- a/llvm/include/llvm/Object/ModuleSymbolTable.h
+++ b/llvm/include/llvm/Object/ModuleSymbolTable.h
@@ -48,6 +48,7 @@ class ModuleSymbolTable {
void printSymbolName(raw_ostream &OS, Symbol S) const;
uint32_t getSymbolFlags(Symbol S) const;
+ static bool canParseInlineASM(const Module &M);
/// Parse inline ASM and collect the symbols that are defined or referenced in
/// the current module.
diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp
index 4fe1f1a0f51833..11d7f097ea6200 100644
--- a/llvm/lib/Linker/LinkModules.cpp
+++ b/llvm/lib/Linker/LinkModules.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Linker/Linker.h"
+#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Support/Error.h"
using namespace llvm;
@@ -32,6 +33,7 @@ class ModuleLinker {
std::unique_ptr<Module> SrcM;
SetVector<GlobalValue *> ValuesToLink;
+ StringSet<> DstGlobalAsmSymbols;
/// For symbol clashes, prefer those from Src.
unsigned Flags;
@@ -79,14 +81,17 @@ class ModuleLinker {
/// Given a global in the source module, return the global in the
/// destination module that is being linked to, if any.
GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
- Module &DstM = Mover.getModule();
// If the source has no name it can't link. If it has local linkage,
// there is no name match-up going on.
if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
return nullptr;
+ return getLinkedToGlobal(SrcGV->getName());
+ }
+ GlobalValue *getLinkedToGlobal(StringRef Name) {
+ Module &DstM = Mover.getModule();
// Otherwise see if we have a match in the destination module's symtab.
- GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
+ GlobalValue *DGV = DstM.getNamedValue(Name);
if (!DGV)
return nullptr;
@@ -106,6 +111,9 @@ class ModuleLinker {
bool linkIfNeeded(GlobalValue &GV, SmallVectorImpl<GlobalValue *> &GVToClone);
+ // Drop GV if it is a weak linkage and the asm symbol is not weak.
+ void dropWeakGVForAsm();
+
public:
ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
std::function<void(Module &, const StringSet<> &)>
@@ -128,6 +136,29 @@ getMinVisibility(GlobalValue::VisibilityTypes A,
return GlobalValue::DefaultVisibility;
}
+static void changeDefToDecl(GlobalValue &GV) {
+ if (auto *F = dyn_cast<Function>(&GV)) {
+ F->deleteBody();
+ } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
+ Var->setInitializer(nullptr);
+ } else {
+ auto &Alias = cast<GlobalAlias>(GV);
+ Module &M = *Alias.getParent();
+ GlobalValue *Declaration;
+ if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
+ Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
+ } else {
+ Declaration =
+ new GlobalVariable(M, Alias.getValueType(), /*isConstant*/ false,
+ GlobalValue::ExternalLinkage,
+ /*Initializer*/ nullptr);
+ }
+ Declaration->takeName(&Alias);
+ Alias.replaceAllUsesWith(Declaration);
+ Alias.eraseFromParent();
+ }
+}
+
bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar) {
const GlobalValue *GVal = M.getNamedValue(ComdatName);
@@ -325,6 +356,34 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
"': symbol multiply defined!");
}
+void ModuleLinker::dropWeakGVForAsm() {
+ Module &DstM = Mover.getModule();
+ if (!ModuleSymbolTable::canParseInlineASM(DstM))
+ return;
+ ModuleSymbolTable::CollectAsmSymbols(
+ DstM, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+ if (Flags & object::BasicSymbolRef::SF_Global &&
+ !(Flags & object::BasicSymbolRef::SF_Weak))
+ DstGlobalAsmSymbols.insert(Name);
+ });
+
+ if (!ModuleSymbolTable::canParseInlineASM(*SrcM))
+ return;
+ ModuleSymbolTable::CollectAsmSymbols(
+ *SrcM, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+ if (Flags & object::BasicSymbolRef::SF_Global &&
+ !(Flags & object::BasicSymbolRef::SF_Weak)) {
+ auto *DstGV = getLinkedToGlobal(Name);
+ if (DstGV && DstGV->hasWeakLinkage()) {
+ if (DstGV->use_empty())
+ DstGV->eraseFromParent();
+ else
+ changeDefToDecl(*DstGV);
+ }
+ }
+ });
+}
+
bool ModuleLinker::linkIfNeeded(GlobalValue &GV,
SmallVectorImpl<GlobalValue *> &GVToClone) {
GlobalValue *DGV = getLinkedToGlobal(&GV);
@@ -394,8 +453,14 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV,
return true;
if (DGV && ComdatFrom == LinkFrom::Both)
GVToClone.push_back(LinkFromSrc ? DGV : &GV);
- if (LinkFromSrc)
+ if (!DGV && GV.hasWeakLinkage() &&
+ DstGlobalAsmSymbols.contains(GV.getName())) {
+ changeDefToDecl(GV);
+ LinkFromSrc = false;
+ }
+ if (LinkFromSrc) {
ValuesToLink.insert(&GV);
+ }
return false;
}
@@ -436,27 +501,7 @@ void ModuleLinker::dropReplacedComdat(
GV.eraseFromParent();
return;
}
-
- if (auto *F = dyn_cast<Function>(&GV)) {
- F->deleteBody();
- } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
- Var->setInitializer(nullptr);
- } else {
- auto &Alias = cast<GlobalAlias>(GV);
- Module &M = *Alias.getParent();
- GlobalValue *Declaration;
- if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
- Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
- } else {
- Declaration =
- new GlobalVariable(M, Alias.getValueType(), /*isConstant*/ false,
- GlobalValue::ExternalLinkage,
- /*Initializer*/ nullptr);
- }
- Declaration->takeName(&Alias);
- Alias.replaceAllUsesWith(Declaration);
- Alias.eraseFromParent();
- }
+ changeDefToDecl(GV);
}
bool ModuleLinker::run() {
@@ -533,6 +578,7 @@ bool ModuleLinker::run() {
if (const Comdat *SC = GA.getComdat())
LazyComdatMembers[SC].push_back(&GA);
+ dropWeakGVForAsm();
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
SmallVector<GlobalValue *, 0> GVToClone;
diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp
index 07f76688fa43e7..d96d2d8caafbae 100644
--- a/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -140,6 +140,13 @@ initializeRecordStreamer(const Module &M,
Init(Streamer);
}
+bool ModuleSymbolTable::canParseInlineASM(const Module &M) {
+ std::string Err;
+ const Triple TT(M.getTargetTriple());
+ const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
+ return T && T->hasMCAsmParser();
+}
+
void ModuleSymbolTable::CollectAsmSymbols(
const Module &M,
function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) {
diff --git a/llvm/test/LTO/X86/inline-asm-lto-weak.ll b/llvm/test/LTO/X86/inline-asm-lto-weak.ll
new file mode 100644
index 00000000000000..b8d694835d6f7a
--- /dev/null
+++ b/llvm/test/LTO/X86/inline-asm-lto-weak.ll
@@ -0,0 +1,84 @@
+; RUN: split-file %s %t
+; RUN: opt %t/asm_global.ll -o %t/asm_global.bc
+; RUN: opt %t/asm_weak.ll -o %t/asm_weak.bc
+; RUN: opt %t/fn_global.ll -o %t/fn_global.bc
+; RUN: opt %t/fn_weak.ll -o %t/fn_weak.bc
+
+; RUN: not llvm-lto %t/asm_global.bc %t/fn_global.bc -o %to1.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llvm-lto %t/fn_global.bc %t/asm_global.bc -o %to1.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+
+; RUN: llvm-lto %t/asm_global.bc %t/fn_weak.bc -o %to2.o --save-linked-module --exported-symbol=foo
+; RUN: llvm-dis < %to2.o.linked.bc | FileCheck %s --check-prefix=ASM_GLOBAL
+; RUN: llvm-lto %t/fn_weak.bc %t/asm_global.bc -o %to2.o --save-linked-module --exported-symbol=foo
+; RUN: llvm-dis < %to2.o.linked.bc | FileCheck %s --check-prefix=ASM_GLOBAL
+
+; FIXME: We want to drop the weak asm symbol.
+; RUN: not llvm-lto %t/asm_global.bc %t/asm_weak.bc -o %to3.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llvm-lto %t/asm_weak.bc %t/asm_global.bc -o %to3.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+
+; FIXME: We want to drop the weak asm symbol.
+; RUN: not llvm-lto %t/asm_weak.bc %t/fn_global.bc -o %to4.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR2
+; RUN: not llvm-lto %t/fn_global.bc %t/asm_weak.bc -o %to4.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR2
+
+; RUN: not llvm-lto %t/asm_weak.bc %t/fn_weak.bc -o %to5.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llvm-lto %t/fn_weak.bc %t/asm_weak.bc -o %to5.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+
+; Drop the weak function.
+; RUN: llvm-lto %t/fn_global.bc %t/fn_weak.bc -o %to6.o --save-linked-module --exported-symbol=foo
+; RUN: llvm-dis < %to6.o.linked.bc | FileCheck %s --check-prefix=FN_GLOBAL
+; RUN: llvm-lto %t/fn_weak.bc %t/fn_global.bc -o %to6.o --save-linked-module --exported-symbol=foo
+; RUN: llvm-dis < %to6.o.linked.bc | FileCheck %s --check-prefix=FN_GLOBAL
+
+; ERR: error: <unknown>:0: symbol 'foo' is already defined
+; ERR2: error: <unknown>:0: foo changed binding to STB_GLOBAL
+
+; FN_GLOBAL: define i32 @foo(i32 %i)
+
+; ASM_GLOBAL: module asm ".global foo"
+; ASM_GLOBAL-NOT: define i32 @foo(i32 %i)
+
+;--- asm_global.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".global foo"
+module asm "foo:"
+module asm "leal 1(%rdi), %eax"
+module asm "retq"
+
+;--- asm_weak.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".weak foo"
+module asm "foo:"
+module asm "leal 2(%rdi), %eax"
+module asm "retq"
+
+;--- fn_global.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32 %i) {
+ %r = add i32 %i, 3
+ ret i32 %r
+}
+
+define internal i32 @bar(i32 %i) {
+ %r = call i32 @foo(i32 %i)
+ ret i32 %r
+}
+
+;--- fn_weak.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define weak i32 @foo(i32 %i) {
+ %r = add i32 %i, 4
+ ret i32 %r
+}
+
+define internal i32 @bar(i32 %i) {
+ %r = call i32 @foo(i32 %i)
+ ret i32 %r
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cachemeifyoucan since afaict this is specific to users of the legacy LTO interfaces. Hoping you can give some insight into where this is best handled and how it works via ld64.
I don't think the handling should be down in the ModuleLinker. To clarify, this does not appear to be a general LTO issue, but you are changing the module linking used by some of the legacy LTO interfaces (e.g. llvm-lto and llvm-link). The new LTO api used by lld, gold plugin, and llvm-lto2 handle this already (in llvm/lib/LTO/LTO.cpp, we decide which symbols to keep when invoking the IRMover based on prevailing info from the linker, and it also processes the asm symbols in the ModuleSymbolTable).
I also believe this is likely handled ok by ld64 which uses the legacy LTO APIs as well, because the ModuleSymbolTable already contains parsed asm symbols, and it has ways of querying that information.
Looking at the linked earlier alternative PR, it looks like this is to fix rust LTO linking that uses llvm-link directly? Here though the tests use llvm-lto. Is rust using one of these tools or is it interfacing with the module linking directly via LLVM libraries? I'm really not sure where in the legacy LTO this handling should go. Note that llvm-lto already creates an LTOModule that contains a ModuleSymbolTable that already has these asm symbols.
Rustc's LLVM backend directly interfaces with libLLVM both for generating the LLVM modules as well as doing LTO. This is necessary for what rustc calls "thin local lto". This is thinLTO, except only applying to a single crate (library/smallest compilation unit) rather than than the linker input. This mode was introduced to allow creating multiple codegen units for a single crate in release mode (to improve build performance) without hurting runtime performance due to not being able to do optimizations across codegen units. Having rustc do LTO using libLLVM is also a lot easier than convincing the linker to use the right libLTO for the LLVM version used by rustc. While for most linkers rustc could ship a matching libLTO, this wouldn't work for lld as far as I understand it, so if the system installation of LLVM is older than rustc (which it is for most users as rustc uses the latest LLVM version while distros generally keep an older one for the entire lifespan of the distro), then you are out of luck. Rustc does support That said for fat LTO I believe we use For thinLTO it seems we duplicated a fair amount of logic from LLVM: https://github.com/rust-lang/rust/blob/e1fadb2c35a6082867a037f012bfdfc5eb686211/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp#L1242-L1355 for the global analysis and then https://github.com/rust-lang/rust/blob/e1fadb2c35a6082867a037f012bfdfc5eb686211/compiler/rustc_codegen_llvm/src/back/lto.rs#L726-L781 for the per-module optimizations. (The last code snippet references some functions named |
If I understand correctly, the new LTO gives control of symbol resolution to the caller? This way rustc can choose which symbols to use, just like llvm-lto2 or lld. I tried it at rust-lang/rust#119348.
@bjorn3 has provided a detailed explanation. The current behavior is more like the legacy LTO.
So will we continue to maintain the legacy LTO? |
@teresajohnson This is In the context of fullLTO, In a more general context, apple linker is good at dead-stripping, and it also picks non-weak symbols over weak symbols, so adding such new function in LTO should not affect ld64.
If rust compiler has idea of which symbol to use, using IRMover is probably better since it gives you much better control of the behavior, rather than rely on a single builtin behavior in ModuleLinker.
While it is not required to implement new features that are available in new LTO interface in legacy LTO interface, please don't break legacy LTO since we still have users of them. |
The associated PR has been reverted in rust-lang/rust#119885. |
Forgot to respond to this question before. Right, lld and other new LTO API users do not use the ModuleLinker, but rather invoke the IRMover directly. |
Thank you for your response. |
Alternative to #76040.
This PR only implements the handling of the weak function.
We should also be able to use
.lto_discard
to handle weak asm symbols. However, we may require additional effort to enhance the maintenance of.lto_discard
.